/*
	Purpose: Using the sample of 1940 Census fathers aged
				30-50 (created in 0a), this file further restricts
				to black and white fathers. Other variables 
				necessary to create average predicted father income 
				at various levels (files 1a-1f) are also cleaned.

	Creates: Census1940_fathers_ages30to50_forIncomeScores.dta
	         Census1940_fathers_ages30to50.dta
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

use ./input/Census1940_full_raw_fathers30to50.dta, clear 

* Fix income variables
	count if incwage==0 
	count if fam_income==0 
	count if hh_income==0 
	
/* When family and household income are 0, it means all components 
   were 0 or missing. Replace these cases as "." */
	replace fam_income=. if number_missing_inc_fam==number_adult_universe_fam & fam_income==0 
	replace hh_income=. if number_missing_inc_hh == number_adult_universe_HH & hh_income==0
	
* Keep fathers with non-missing, non-zero personal income
	keep if incwage<. & incwage!=0
	assert hh_income!=0 & hh_income!=.

/*
   Convert income variables to 1950 dollars using the CPI: 
   Source: https://www.minneapolisfed.org/about-us/monetary-policy/inflation-calculator/consumer-price-index-1913-)
*/
	gen CPI1940 = 14
	gen CPI1950= 24.1
	
	foreach var of varlist incwage fam_income hh_income {
		replace `var' = `var' * (CPI1950 / CPI1940)
	}
	
* Restrict age range to 30-50, race to black and white
	keep if race<=200 //race==1 | race==2
	replace race = race/100
	
/* Note: Per Census documentation, OCCSCORE is a constructed 
         2-digit numeric variable that assigns occupational 
         income scores to each occupation. OCCSCORE represents 
         the median total income (in hundreds of 1950 dollars) 
         of all persons with that particular occupation in 1950. 
*/
	replace occscore=occscore*100
	
* State variable
	tab statefip, m
	rename statefip fips
	
* Region variable
	rename region region_og 

    * Northeast: Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, Vermont, New Jersey, New York, Pennsylvania
    * Midwest: Illinois, Indiana, Michigan, Ohio, Wisconsin, Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, South Dakota
    /* South: Delaware, District of Columbia, Florida, Georgia, Maryland, North Carolina, South Carolina, Virginia, West Virginia, Alabama,
              Kentucky, Mississippi, Tennessee, Arkansas, Louisiana, Oklahoma, Texas */
    * West: Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, Wyoming, California, Oregon, Washington --note: Census puts AK and HI in with Pacific division

	gen region=.
	replace region=1 if fips==9 | fips==23 | fips==25 | fips==33 | fips==44 | fips==50 | fips==34 | fips==36 | fips==42
	replace region=2 if fips==17 | fips==18 | fips==26 | fips==39 | fips==55 | fips==19 | fips==20 | fips==27 | fips==29 | fips==31 | fips==38 | fips==46 //12
	replace region=3 if fips==10 | fips==11 | fips==12 | fips==13 | fips==24 | fips==37 | fips==45 | fips==51 | fips==54 ///
	| fips==1 | fips==21 | fips==28 | fips==47 | fips==5 | fips==22 | fips==40 | fips==48 //17
	replace region=4 if fips==4 | fips==8 | fips==16 | fips==30 | fips==32 | fips==35 | fips==49 | fips==56 | fips==6 | fips==41 | fips==53 | fips==2 | fips==15 //13
	
	* Southern residence
	gen south_merge = region==3
	
* Education variable
	gen edu=.
	replace edu=1 if educ<26 //<grade school
	replace edu=2 if educ==26 //8th grade
	replace edu=3 if inlist(educ,30,40,50) //<hs
	replace edu=4 if educ==60 //hs
	replace edu=5 if educ>60 & educ<999 //>shs. "999" is missing
	tab edu, m
	

*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

****************
**** ASSIGN COARSENED OCCS
****************

* Set up variables
	sort occ1950

* Count # of Census occupations in 1940 data
	bysort occ1950: gen nvals = _n ==1
	count if nvals==1 //215

* Separate people with occupations in 200's based on self-employment
	replace occ1950=occ1950+1000 if (occ1950>=200 & occ1950<=290) & classwkr<20
	
* Crosswalk Census occupations to coarsened ANES occupations
	merge m:1 occ1950 using ../Crosswalks/Crosswalk_1950Census_toANES.dta
	assert occ1950>=980 & occ1950<=995 if _merge==1
	keep if _merge==3
	drop _merge
	
	assert occ1950==. if occ1950ej==.
	drop if occ1950ej==. 
	
*------------------------------------------------------------------------------------*
*------------------------------------------------------------------------------------*

***********************
*** SAVE VERSION FOR INCOME SCORES 
***********************

preserve 

	keep occ1950ej race south_merge incwage hh_income fam_income edu region occscore number_children* number_people*

	save ./input/Census1940_fathers_ages30to50_forIncomeScores.dta, replace

restore 
	
***********************
*** SAVE VERSION FOR TSIV 
***********************

preserve

* Keep relevant variables (e.g., household income)
	rename occ1950ej fatheroccej
	drop if fatheroccej==99
	
	keep race south_merge fatheroccej edu incwage hh_income fam_income age
	
	gen census=1
	label var census "Census obs"
	
	
	gen agesq = age* age
	
	compress 
	save ./input/Census1940_fathers_ages30to50.dta, replace 

restore 
	

